{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib as plt\n",
    "\n",
    "%matplotlib inline\n",
    "# set default diplay row count\n",
    "# pd.options.display.max_rows=100"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## concat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = np.array([x+str(y) for y in range(12) for x in list('ABCD')])\n",
    "print('type(data): ', type(data))\n",
    "data = data.reshape(12,4)\n",
    "print('data.shape: ', data.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = pd.DataFrame(data[:4], columns=['A', 'B', 'C', 'D'])\n",
    "df2 = pd.DataFrame(data[4:8], columns=['A', 'B', 'C', 'D'])\n",
    "df3 = pd.DataFrame(data[8:12], columns=['A', 'B', 'C', 'D'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "res1 = pd.concat([df1,df2,df3], ignore_index=True)\n",
    "res1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# keys cannot used with ignore_index=True\n",
    "res2 = pd.concat([df1, df2, df3], keys=['x','y'], ignore_index=False)\n",
    "res2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "res2.loc['x']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data4 = np.array([x+str(y) for y in [2,3,6,7] for x in list('BDE')])\n",
    "data4 = data4.reshape(4, 3)\n",
    "df4 = pd.DataFrame(data4, index=[2,3,6,7], columns=['B','D','E'])\n",
    "df4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# default: outer\n",
    "res3 = pd.concat([df1, df4], axis=1)\n",
    "res3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "res4 = pd.concat([df1, df4], axis=1, join='inner')\n",
    "res4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "res5 = pd.concat([df1, df4], axis=1, join_axes=[df1.index])\n",
    "res5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "res6 = pd.concat([df1, df4], axis=1, join_axes=[df1.index], ignore_index=True)\n",
    "res6"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## append"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "app1 = df1.append(df2)\n",
    "app1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# H and V fill\n",
    "app2 = df1.append(df4, sort=False)\n",
    "app2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## merge"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "left = pd.DataFrame(\n",
    "    [['K0', 'A0', 'B0'],['K1', 'A1', 'B1'],['K2', 'A2', 'B2'], ['K3', 'A3', 'B3']],\n",
    "    columns=['K','A','B'])\n",
    "left                             "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "right = pd.DataFrame(\n",
    "    [['K0', 'C0', 'D0'],['K1', 'C1', 'D1'],['K2', 'C2', 'D2'], ['K3', 'C3', 'D3']],\n",
    "    columns=['K','C','D'])\n",
    "right                            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "merg1 = pd.merge(left, right)\n",
    "# merg1 = pd.merge(left, right, on='K')\n",
    "merg1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "left2 = pd.DataFrame(\n",
    "    [['K0', 'J0', 'A0', 'B0'],['K0', 'J1', 'A1', 'B1'],['K1', 'J0', 'A2', 'B2'], ['K2', 'J1', 'A3', 'B3']],\n",
    "    columns=['K', 'J', 'A','B'])\n",
    "left2                            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "right2 = pd.DataFrame(\n",
    "    [['K0', 'J0', 'A0', 'B0'],['K1', 'J0', 'A1', 'B1'],['K1', 'J0', 'A2', 'B2'], ['K2', 'J0', 'A3', 'B3']],\n",
    "    columns=['K', 'J', 'A','B'])\n",
    "right2                            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# default: how = 'inner'\n",
    "mrg2 = pd.merge(left2, right2, on=['K', 'J'], how='inner')\n",
    "mrg2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mrg3 = pd.merge(left2, right2, on=['K', 'J'], how='outer')\n",
    "mrg3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mrg4 = pd.merge(left2, right2, on=['K', 'J'], how='left')\n",
    "mrg4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mrg5 = pd.merge(left2, right2, on=['K', 'J'], how='right')\n",
    "mrg5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 其他"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "oth_df = pd.DataFrame({'col1':[1,2], 'col2':[2,3]})\n",
    "oth_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "oth_df.loc[2] = np.array([3,4])\n",
    "oth_df\n",
    "\n",
    "# error: single positional indexer is out-of-bounds\n",
    "# oth_df.iloc[3] = np.array([4,5])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
